# -*- coding:utf-8 -*-
# @Time:2024/4/1921:01
# @Author:miuzg
# @FileName:new test2.py
# @Software:PyCharm
from bs4 import BeautifulSoup

html = """
<html><head><title>The Dormouse’s story</title></head>

<p class="title"><b>The Dormouse’s story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<span>我是外面的span</span>
<p class="story">
    <span>多喝热水</span>
</p>
<img src="procrastination.img">

"""

soup = BeautifulSoup(html,'lxml')

# 找到所有的a标签
print(soup.select('a'))

# 通过类名查找
print(soup.select('.sister'))

# 通过id查找
print(soup.select('#link2'))

# 通过组合查找 获取xx标签里的x标签(>后面是子代，空格后面是后代)
print(soup.select('head>title'))
print(soup.select('p>span'))
print(soup.select('p #link1'))

# 属性查找
print(soup.select('img[src="procrastination.img"]'))

# 获取内容不获取标签
print(soup.select('#link1')[0].get_text())